# 1) 拿到Spark入口对象 SparkSession
# yarn/mesos/local
# local : 在本地内存运行
from pyspark.sql import SparkSession
from pyspark.sql.functions import when, col, avg, max, min
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType

spark = SparkSession.builder \
    .appName("HelloSpark") \
    .master("local") \
    .getOrCreate()

user_df = spark.createDataFrame([(0, "Lucy", 0), (1, "Lily", 0), (2, "Tim", 2), (3, "Danial", 0), (9999, 'allien', 23423)],
                                ["id", "name", "cityId"])

city_df = spark.createDataFrame([(0, "Beijing"), (1, "Shanghai"), (2, "Guangzhou")], ["id", "name"])

user_df.show()
city_df.show()

# join 基本语法
user_df.join(city_df, user_df['cityId'] == city_df['id'], how='left') \
    .select(user_df['id'], user_df['name'], city_df['name'].alias('cityName')) \
    .show()

# join 类型
